home *** CD-ROM | disk | FTP | other *** search
- From: turner@imagen.UUCP <talcott!topaz!Shasta!imagen!Jim.Turner>
- Subject: unc - 68000 disassembler
- Newsgroups: mod.sources
- Approved: jpn@panda.UUCP
-
- Mod.sources: Volume 4, Issue 30
- Submitted by: turner@imagen.UUCP <talcott!topaz!Shasta!imagen!Jim.Turner>
-
- #! /bin/sh
- # This is a shell archive, meaning:
- # 1. Remove everything above the #! /bin/sh line.
- # 2. Save the resulting text in a file.
- # 3. Execute the file with /bin/sh (not csh) to create the files:
- # README
- # doc
- # doc.out
- # makefile
- # unc.h
- # alloc.c
- # file.c
- # heur.c
- # This archive created: Fri Mar 14 09:54:34 1986
- export PATH; PATH=/bin:$PATH
- echo shar: extracting "'README'" '(806 characters)'
- if test -f 'README'
- then
- echo shar: will not over-write existing file "'README'"
- else
- cat << \SHAR_EOF > 'README'
- I have gotten many many requests to email this or post it, because of
- its size email'ing it screws up most mailers, so i am submitting it to
- mod.sources to be posted. Please note a major cavaet with this, it was
- written under Unisoft's port of Unix so the a.out file that it uses
- more closely resembles the b.out.h file that most cross assemblers
- (e.g. greenhills) use. For the obvious reasons i have not included that
- file with the posting. I did not write this nor do i make any claim to
- that effect.
-
- turner@imagen.UUCP <talcott!topaz!Shasta!imagen!Jim.Turner>
-
- ----------------------------
- This is the 68000 disassembler mentioned on the net.
- It is not my final version by any means, but I have found it extremely
- useful and it represents several weeks' work.
-
- John Collins. <jmc@inset.UUCP>
- SHAR_EOF
- if test 806 -ne "`wc -c < 'README'`"
- then
- echo shar: error transmitting "'README'" '(should have been 806 characters)'
- fi
- fi
- echo shar: extracting "'doc'" '(6445 characters)'
- if test -f 'doc'
- then
- echo shar: will not over-write existing file "'doc'"
- else
- cat << \SHAR_EOF > 'doc'
- .\"/*% nroff -cm -rL72 %|epson|spr -f plain.a -h uncdoc -w
- .nr Hb 7
- .nr Hs 3
- .ds HF 3 3 3 3 3 3 3
- .nr Hu 5
- .nr Hc 1
- .SA 1
- .PH "''A Disassembler''"
- .PF "'Issue %I%'- Page \\\\nP -'%G%'"
- .H 1 "Introduction"
- This document describes the first release of a disassembler for UNIX
- executable files.
- The key features are:
- .AL
- .LI
- For object files the output can be assembled to generate the same
- object module, (apart from minor variations in symbol table ordering) as the
- input.
- .LI
- For stripped executable files object modules and libraries may be scanned,
- modules in the main input identified and the appropriate names automatically
- inserted into the output.
- .LI
- An option is available to convert most non-global names into local symbols,
- which cuts down the symbols in the generated assembler file.
- .LI
- The disassembler copes reasonably with modules merged with the
- .B "-r"
- option to
- .B "ld" ,
- generating a warning message as to the number of modules involved.
- .LE
- .P
- At present this is available for certain Motorola 68000 ports of UNIX
- System III and System V. Dependencies on
- .AL a
- .LI
- Instruction set.
- .LI
- Object module format.
- .LI
- Library module format.
- .LI
- Assembler output format.
- .LE
- .P
- are hopefully sufficiently localised to make the product useful as a
- basis for other disassemblers for other versions of UNIX.
- .P
- The product is thus distributed in source form at present.
- .H 1 "Use"
- The disassembler is run by entering:
- .DS I
- unc mainfile lib1 lib2 ...
- .DE
- .P
- The first named file is the file to be disassembled, which should be
- a single file, either an object module, a (possibly stripped) executable
- file, or a library member. Library members are designated using a
- parenthesis notation, thus:
- .DS I
- unc '/lib/libc.a(printf.o)'
- .DE
- .P
- It is usually necessary to escape the arguments in this case to prevent
- misinterpretation by the shell. Libraries in standard places such as
- .I "/lib"
- and
- .I "/usr/lib"
- may be specified in the same way as to
- .B "ld" ,
- thus
- .DS I
- unc '-lc(printf.o)'
- unc '-lcurses(wmove.o)'
- .DE
- .P
- As an additional facility, the list of directories searched for
- libraries may be varied by setting the environment variable
- .B "LDPATH" ,
- which is interpreted similarly to the shell
- .B "PATH"
- variable, and of course defaults to
- .DS I
- LDPATH=/lib:/usr/lib
- .DE
- .P
- As a further facility, the insertion of
- .B "lib"
- before and
- .B ".a"
- after the argument may be suppressed by using a capital
- .B "-L"
- argument, thus to print out the assembler for
- .I "/lib/crt0.o" ,
- then the command
- .DS I
- unc -Lcrt0.o
- .DE
- .P
- should have the desired effect.
- .P
- Second and subsequent file arguments are only referenced for stripped
- executable files, and may consist of single object files and library
- members, using the same syntax as before, or whole libraries of object
- files, thus:
- .DS I
- unc strippedfile -Lcrt0.o -lcurses -ltermcap '-lm(sqrt.o)' -lc
- .DE
- .P
- It is advisable to make some effort to put the libraries to be searched
- in the order in which they were originally loaded. This is because the
- search for each module starts where the previously matched module ended.
- However, no harm is done if this rule is not adhered to apart from
- increased execution time except in the rare cases where the disassembler
- is confused by object modules which are very nearly similar.
- .H 1 "Additional options"
- The following options are available to modify the behaviour of the
- disassembler.
- .VL 15 2
- .LI "-o file"
- Causes output to be sent to the specified file instead of the standard
- output.
- .LI "-t prefix"
- Causes temporary files to be created with the given prefix. The default
- prefix is
- .B "split" ,
- thus causing two temporary files to be created with this prefix in the
- current directory. If it is desired, for example, to create the files as
- .B "/tmp/xx*" ,
- then the argument
- .B "-t /tmp/xx"
- should be given. Note that the temporary files may be very large as a
- complete map of the text and data segments is generated.
- .LI "-a"
- Suppresses the generation of non-global absolute symbols from the
- output. This saves output from C compilations without any obvious
- problems, but the symbols are by default included in the name of
- producing as nearly identical output as possible to the original source.
- .LI "-s"
- Causes an additional scan to take place where all possible labels are
- replaced by local symbols. The local symbols are inserted in strictly
- ascending order, starting at 1.
- .LI "-v"
- Causes a blow-by-blow account of activities to be output on the standard
- error.
- .LE
- .H 1 "Diagnostics etc"
- Truncated or garbled object and library files usually cause processing
- to stop with an explanatory message.
- .P
- The only other kinds of message are some passing warnings concerning
- obscure constructs not handled, such as the relocation of byte fields,
- or the relocation of overlapping fields. Occasionally a message
- .DS I
- Library clash: message
- .DE
- .P
- may appear and processing cease. This message is found where at a late
- stage in processing libraries, the program discovers that due to the
- extreme similarity of two or more library members, it has come to the
- wrong conclusion about which one to use. The remedy here is to spell out
- to the program which members to take in which order.
- .H 1 "Future development"
- In the future it is hoped to devise ways of making the disassembler
- independent of all the above-mentioned version dependencies, by first
- reading a files defining these things. This will probably be applied
- after the Common Object Format becomes more standard.
- .P
- In the long term it would be desirable and useful to enhance the product
- to produce compilable C in addition to assemblable assembler. Stages in
- the process are seen as follows:
- .AL
- .LI
- Better identification of basic blocks in the code. Switch statements are
- a major problem here, as are constant data held in the text segment.
- .LI
- Marrying of data to the corresponding text. It is in various places hard
- to divorce static references "on the fly" (e.g. strings, and switch
- lists in some implementations) from static at the head of a module. This
- is part of the problem of identifying basic blocks.
- .LI
- Compilation of header files to work out structure references within the
- text. At this stage some interaction may be needed.
- .LE
- .P
- Meanwhile the product is one which is a useful tool to the author in its
- present form. Comments and suggestions as to the most practical method
- of improving the product in the ways suggested or in other ways would be
- gratefully considered.
- SHAR_EOF
- if test 6445 -ne "`wc -c < 'doc'`"
- then
- echo shar: error transmitting "'doc'" '(should have been 6445 characters)'
- fi
- fi
- echo shar: extracting "'doc.out'" '(7415 characters)'
- if test -f 'doc.out'
- then
- echo shar: will not over-write existing file "'doc.out'"
- else
- cat << \SHAR_EOF > 'doc.out'
-
-
-
- A Disassembler
-
-
-
- 1. Introduction
-
- This document describes the first release of a disassembler
- for UNIX executable files. The key features are:
-
- 1. For object files the output can be assembled to
- generate the same object module, (apart from minor
- variations in symbol table ordering) as the input.
-
- 2. For stripped executable files object modules and
- libraries may be scanned, modules in the main input
- identified and the appropriate names automatically
- inserted into the output.
-
- 3. An option is available to convert most non-global
- names into local symbols, which cuts down the symbols
- in the generated assembler file.
-
- 4. The disassembler copes reasonably with modules merged
- with the -r option to ld, generating a warning message
- as to the number of modules involved.
-
- At present this is available for certain Motorola 68000
- ports of UNIX System III and System V. Dependencies on
-
- a. Instruction set.
-
- b. Object module format.
-
- c. Library module format.
-
- d. Assembler output format.
-
- are hopefully sufficiently localised to make the product
- useful as a basis for other disassemblers for other versions
- of UNIX.
-
- The product is thus distributed in source form at present.
-
-
- 2. Use
-
- The disassembler is run by entering:
-
- unc mainfile lib1 lib2 ...
-
- The first named file is the file to be disassembled, which
- should be a single file, either an object module, a
- (possibly stripped) executable file, or a library member.
- Library members are designated using a parenthesis notation,
- thus:
-
-
-
- Page 1
-
-
-
-
-
-
-
- A Disassembler
-
-
-
- unc '/lib/libc.a(printf.o)'
-
- It is usually necessary to escape the arguments in this case
- to prevent misinterpretation by the shell. Libraries in
- standard places such as /lib and /usr/lib may be specified
- in the same way as to ld, thus
-
- unc '-lc(printf.o)'
- unc '-lcurses(wmove.o)'
-
- As an additional facility, the list of directories searched
- for libraries may be varied by setting the environment
- variable LDPATH, which is interpreted similarly to the shell
- PATH variable, and of course defaults to
-
- LDPATH=/lib:/usr/lib
-
- As a further facility, the insertion of lib before and .a
- after the argument may be suppressed by using a capital -L
- argument, thus to print out the assembler for /lib/crt0.o,
- then the command
-
- unc -Lcrt0.o
-
- should have the desired effect.
-
- Second and subsequent file arguments are only referenced for
- stripped executable files, and may consist of single object
- files and library members, using the same syntax as before,
- or whole libraries of object files, thus:
-
- unc strippedfile -Lcrt0.o -lcurses -ltermcap '-lm(sqrt.o)' -lc
-
- It is advisable to make some effort to put the libraries to
- be searched in the order in which they were originally
- loaded. This is because the search for each module starts
- where the previously matched module ended. However, no harm
- is done if this rule is not adhered to apart from increased
- execution time except in the rare cases where the
- disassembler is confused by object modules which are very
- nearly similar.
-
-
- 3. Additional options
-
- The following options are available to modify the behaviour
- of the disassembler.
-
- -o file Causes output to be sent to the specified
- file instead of the standard output.
-
-
-
-
- Page 2
-
-
-
-
-
-
-
- A Disassembler
-
-
-
- -t prefix Causes temporary files to be created with the
- given prefix. The default prefix is split,
- thus causing two temporary files to be
- created with this prefix in the current
- directory. If it is desired, for example, to
- create the files as /tmp/xx*, then the
- argument -t /tmp/xx should be given. Note
- that the temporary files may be very large as
- a complete map of the text and data segments
- is generated.
-
- -a Suppresses the generation of non-global
- absolute symbols from the output. This saves
- output from C compilations without any
- obvious problems, but the symbols are by
- default included in the name of producing as
- nearly identical output as possible to the
- original source.
-
- -s Causes an additional scan to take place where
- all possible labels are replaced by local
- symbols. The local symbols are inserted in
- strictly ascending order, starting at 1.
-
- -v Causes a blow-by-blow account of activities
- to be output on the standard error.
-
-
- 4. Diagnostics etc
-
- Truncated or garbled object and library files usually cause
- processing to stop with an explanatory message.
-
- The only other kinds of message are some passing warnings
- concerning obscure constructs not handled, such as the
- relocation of byte fields, or the relocation of overlapping
- fields. Occasionally a message
-
- Library clash: message
-
- may appear and processing cease. This message is found where
- at a late stage in processing libraries, the program
- discovers that due to the extreme similarity of two or more
- library members, it has come to the wrong conclusion about
- which one to use. The remedy here is to spell out to the
- program which members to take in which order.
-
-
-
-
-
-
-
-
- Page 3
-
-
-
-
-
-
-
- A Disassembler
-
-
-
- 5. Future development
-
- In the future it is hoped to devise ways of making the
- disassembler independent of all the above-mentioned version
- dependencies, by first reading a files defining these
- things. This will probably be applied after the Common
- Object Format becomes more standard.
-
- In the long term it would be desirable and useful to enhance
- the product to produce compilable C in addition to
- assemblable assembler. Stages in the process are seen as
- follows:
-
- 1. Better identification of basic blocks in the code.
- Switch statements are a major problem here, as are
- constant data held in the text segment.
-
- 2. Marrying of data to the corresponding text. It is in
- various places hard to divorce static references "on
- the fly" (e.g. strings, and switch lists in some
- implementations) from static at the head of a module.
- This is part of the problem of identifying basic
- blocks.
-
- 3. Compilation of header files to work out structure
- references within the text. At this stage some
- interaction may be needed.
-
- Meanwhile the product is one which is a useful tool to the
- author in its present form. Comments and suggestions as to
- the most practical method of improving the product in the
- ways suggested or in other ways would be gratefully
- considered.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Page 4
-
-
-
-
- SHAR_EOF
- if test 7415 -ne "`wc -c < 'doc.out'`"
- then
- echo shar: error transmitting "'doc.out'" '(should have been 7415 characters)'
- fi
- fi
- echo shar: extracting "'makefile'" '(128 characters)'
- if test -f 'makefile'
- then
- echo shar: will not over-write existing file "'makefile'"
- else
- cat << \SHAR_EOF > 'makefile'
- CFLAGS=-v -OB
- OBJS=alloc.o file.o libmtch.o robj.o iset.o prin.o heur.o main.o
-
- unc: $(OBJS)
- cc -o unc $(OBJS)
-
- $(OBJS): unc.h
- SHAR_EOF
- if test 128 -ne "`wc -c < 'makefile'`"
- then
- echo shar: error transmitting "'makefile'" '(should have been 128 characters)'
- fi
- fi
- echo shar: extracting "'unc.h'" '(4526 characters)'
- if test -f 'unc.h'
- then
- echo shar: will not over-write existing file "'unc.h'"
- else
- cat << \SHAR_EOF > 'unc.h'
- /*
- * SCCS: @(#)unc.h 1.2 11/2/84 14:21:02
- * Header file for uncompile program.
- *
- ***********************************************************************
- * This software is copyright of
- *
- * John M Collins
- * 47 Cedarwood Drive
- * St Albans
- * Herts, AL4 0DN
- * England +44 727 57267
- *
- * and is released into the public domain on the following conditions:
- *
- * 1. No free maintenance will be guaranteed.
- * 2. Nothing may be based on this software without
- * acknowledgement, including incorporation of this
- * notice.
- *
- * Notwithstanding the above, the author welcomes correspondence and bug
- * fixes.
- ***********************************************************************
- */
-
- #define MAXCHARS 50
- #define HASHMOD 97
-
- /*
- * The following structure is used to keep track of symbols.
- */
-
- struct symstr {
- struct symstr *s_next; /* Next in hash chain */
- struct symstr *s_link; /* Next in duplicate labels */
- unsigned s_type : 3; /* Symbol type */
- unsigned s_newsym: 1; /* A new symbol */
- unsigned s_invent: 1; /* Invented symbol */
- unsigned s_glob : 1; /* Global symbol */
- long s_value; /* Value if defined */
- short s_defs; /* Defined count */
- short s_used; /* Used count */
- unsigned short s_lsymb; /* Local symbol */
- char s_name[1]; /* Chars of name null term */
- };
-
- typedef struct symstr *symbol;
-
- symbol symbhash[HASHMOD];
-
- typedef struct {
- int ef_t; /* Text file fd */
- int ef_d; /* Data file fd */
- long ef_entry; /* Entry point */
- long ef_tsize; /* Text size */
- long ef_dsize; /* Data size */
- long ef_bsize; /* Bss size */
- long ef_end; /* End of it all */
- long ef_tbase; /* Text base */
- long ef_dbase; /* Data base */
- long ef_bbase; /* Bss base */
- int ef_stcnt; /* Number of symbols */
- int ef_stmax; /* Max number of symbols */
- symbol *ef_stvec; /* Symbol vector */
- } ef_fids;
-
- typedef ef_fids *ef_fid;
-
- /*
- * Description of word in text file. This entry is held in the place
- * corresponding to the address in the text file.
- */
-
- typedef struct {
- unsigned short t_contents; /* Actual contents */
- unsigned short t_iindex; /* Index in table */
- unsigned t_type : 2; /* Type */
- unsigned t_vins : 1; /* Valid instruction */
- unsigned t_bdest : 1; /* Is branch dest */
- unsigned t_gbdest: 1; /* Is global dest */
- unsigned t_dref : 1; /* Refered to in data */
- unsigned t_bchtyp: 2; /* Branch type */
- unsigned t_lng : 3; /* Length in words */
- unsigned t_reloc : 2; /* Relocatable */
- unsigned t_rptr : 2; /* Where relocated */
- unsigned t_rdisp : 1; /* Relocatable displacement */
- unsigned t_isrel : 1; /* Relocated */
- unsigned t_amap : 1; /* Worked out */
- symbol t_relsymb; /* Relocation symbol */
- long t_reldisp; /* Offset + or - from symb */
- symbol t_lab; /* Label */
- unsigned short t_lsymb; /* Local symbol */
- long t_reflo; /* Lowest place referred */
- long t_refhi; /* Highest place referred */
- unsigned short t_match; /* Lib match lng */
- } t_entry;
-
- /*
- * Types ......
- */
-
- #define T_UNKNOWN 0
- #define T_BEGIN 1
- #define T_CONT 2
-
- #define R_NONE 0 /* No relocation */
- #define R_BYTE 1 /* Byte relocation */
- #define R_WORD 2 /* Word relocation */
- #define R_LONG 3 /* Long relocation */
-
- /*
- * Branch types.
- */
-
- #define T_NOBR 0
- #define T_CONDBR 1
- #define T_UNBR 2
- #define T_JSR 3
-
- typedef struct {
- unsigned char d_contents; /* Actual contents */
- unsigned d_type : 4; /* Data type */
- unsigned d_reloc : 2; /* Relocatable */
- unsigned d_rptr : 2; /* Where relocated */
- short d_lng; /* Length -ve for D_CONT */
- symbol d_relsymb; /* Relocation symbol */
- long d_reldisp; /* Offset + or - from symb */
- symbol d_lab; /* Label */
- } d_entry;
-
- /*
- * Data types.
- */
-
- #define D_ASC 0 /* Ascii chars */
- #define D_ASCZ 1 /* Null-term ascii */
- #define D_BYTE 2 /* Decimal bytes */
- #define D_WORD 3 /* Words */
- #define D_LONG 4 /* Longs */
- #define D_ADDR 5 /* Address pointer */
- #define D_CONT 6 /* Continuation of last */
-
- /*
- * 'Common' items.
- */
-
- struct commit {
- symbol *c_symb; /* List of symbols */
- int c_int; /* Current number */
- int c_max; /* Maximum */
- };
-
- /*
- * Library file description.
- */
-
- struct libit {
- int lf_fd; /* File descriptor */
- long lf_offset; /* Offset of current file */
- long lf_next; /* Offset of next file */
- char lf_name[14]; /* Name of item */
- };
- SHAR_EOF
- if test 4526 -ne "`wc -c < 'unc.h'`"
- then
- echo shar: error transmitting "'unc.h'" '(should have been 4526 characters)'
- fi
- fi
- echo shar: extracting "'alloc.c'" '(6396 characters)'
- if test -f 'alloc.c'
- then
- echo shar: will not over-write existing file "'alloc.c'"
- else
- cat << \SHAR_EOF > 'alloc.c'
- /*
- * SCCS: @(#)alloc.c 1.2 11/2/84 14:17:20
- * Allocate space etc.
- *
- ***********************************************************************
- * This software is copyright of
- *
- * John M Collins
- * 47 Cedarwood Drive
- * St Albans
- * Herts, AL4 0DN
- * England +44 727 57267
- *
- * and is released into the public domain on the following conditions:
- *
- * 1. No free maintenance will be guaranteed.
- * 2. Nothing may be based on this software without
- * acknowledgement, including incorporation of this
- * notice.
- *
- * Notwithstanding the above, the author welcomes correspondence and bug
- * fixes.
- ***********************************************************************
- */
-
- #include <stdio.h>
- #include <a.out.h>
- #include <setjmp.h>
- #include "unc.h"
-
- #define STINC 10
-
- char *malloc(), *realloc();
- char *strncpy();
- void gette(), getde(), setde(), putte(), putde();
- void unimpl();
- long gettw();
-
- ef_fids mainfile;
-
- /*
- * Oops! out of memory.....
- */
-
- void nomem()
- {
- (void) fprintf(stderr, "Sorry - run out of memory\n");
- exit(255);
- }
-
- /*
- * Look up hash value of symbol.
- */
-
- unsigned shash(str)
- register char *str;
- {
- register unsigned result = 0;
- register int cnt = 0;
-
- while (*str && cnt < MAXCHARS) {
- result += *str++;
- cnt++;
- }
- return result % HASHMOD;
- }
-
- /*
- * Look up hash value of symbol, possibly allocating a new symbol.
- */
-
- symbol lookup(str)
- char *str;
- {
- register symbol res, *pp;
- register int len;
-
- pp = &symbhash[shash(str)];
- res = *pp;
- while (res != NULL) {
- if (strncmp(res->s_name, str, MAXCHARS) == 0)
- return res;
- pp = &res->s_next;
- res = *pp;
- }
- for (len = 0; len < MAXCHARS; len++)
- if (str[len] == '\0')
- break;
- len++;
- res = (symbol) malloc(sizeof(struct symstr) + len);
- if (res == NULL)
- nomem();
- *pp = res;
- res->s_next = NULL;
- (void) strncpy(res->s_name, str, len);
- res->s_name[len] = '\0'; /* Null-terminate */
- res->s_newsym = 1;
- res->s_glob = 0;
- res->s_invent = 0;
- res->s_link = NULL;
- res->s_used = 0;
- res->s_defs = 0;
- res->s_lsymb = 0;
- return res;
- }
-
- /*
- * Invent a symbol, making sure that we don't know it.
- */
-
- symbol inventsymb(prefix)
- char *prefix;
- {
- static int nsymb = 0;
- char schars[10];
- register symbol res;
-
- do (void) sprintf(schars, "%s%d", prefix, ++nsymb);
- while (!(res = lookup(schars))->s_newsym);
-
- res->s_newsym = 0;
- res->s_invent = 1;
- return res;
- }
-
- /*
- * Reallocate symbol table.
- */
-
- void reallst(outf)
- register ef_fid outf;
- {
- outf->ef_stmax += STINC;
- if (outf->ef_stvec == NULL)
- outf->ef_stvec = (symbol *) malloc(outf->ef_stmax * sizeof(symbol));
- else
- outf->ef_stvec = (symbol *) realloc(outf->ef_stvec,
- outf->ef_stmax * sizeof(symbol));
- if (outf->ef_stvec == NULL)
- nomem();
- }
-
- /*
- * Search through existing symbol table for symbol with given
- * value. Invent a new one if needed.
- */
-
- symbol getnsymb(fid, seg, pos)
- register ef_fid fid;
- unsigned seg;
- long pos;
- {
- register int i;
- register symbol res;
-
- /*********** MACHINE DEPENDENT ******************************
- * Convert relocation segment type (argument) to symbol type
- * (as remembered in symbol table). Don't ask me why they
- * have to be different.....
- */
-
- seg += TEXT - RTEXT;
-
- /*
- * See if the reference is to an external symbol.
- * If so, use that.
- */
-
- for (i = 0; i < fid->ef_stcnt; i++) {
- res = fid->ef_stvec[i];
- if (res->s_type == seg && res->s_value == pos)
- return res;
- }
-
- /*
- * Invent a symbol and use that.
- */
-
- res = inventsymb("RS");
- if (fid->ef_stcnt >= fid->ef_stmax)
- reallst(fid);
- fid->ef_stvec[fid->ef_stcnt++] = res;
- res->s_type = seg;
- res->s_value = pos;
- if (seg == TEXT) {
- t_entry tent;
- gette(fid, pos, &tent);
- tent.t_bdest = 1;
- tent.t_lab = res;
- putte(fid, pos, &tent);
- }
- else if (seg == DATA || seg == BSS) {
- d_entry dent;
- getde(fid, pos, &dent);
- dent.d_lab = res;
- putde(fid, pos, &dent);
- }
-
- return res;
- }
-
- /*
- * Assuming address given is in text segment, find its label, or invent
- * one. Also set where refered from.
- */
-
- symbol textlab(loc, refpos)
- long loc, refpos;
- {
- t_entry tent;
-
- gette(&mainfile, loc, &tent);
- if (tent.t_type == T_CONT)
- return NULL;
- if (tent.t_lab == NULL) {
- tent.t_lab = inventsymb("TS");
- tent.t_lab->s_type = TEXT;
- tent.t_lab->s_value = loc;
- tent.t_bdest = 1;
- putte(&mainfile, loc, &tent);
- }
- else
- tent.t_lab->s_used++;
- if (tent.t_refhi < refpos) {
- tent.t_refhi = refpos;
- putte(&mainfile, loc, &tent);
- }
- if (tent.t_reflo > refpos) {
- tent.t_reflo = refpos;
- putte(&mainfile, loc, &tent);
- }
- return tent.t_lab;
- }
-
- /*
- * Note references to data.
- */
-
- void mkdref(tpos, size)
- long tpos;
- unsigned size;
- {
- t_entry tent;
- d_entry dent;
- register symbol ds;
- int dchng = 0;
- int wsize;
- long dpos;
-
- gette(&mainfile, tpos, &tent);
- if (tent.t_relsymb != NULL)
- return;
-
- dpos = gettw(&mainfile, tpos, R_LONG);
- if (dpos < mainfile.ef_dbase || dpos > mainfile.ef_end)
- return;
-
- switch (size) {
- default:
- wsize = D_BYTE;
- break;
- case 2:
- wsize = D_WORD;
- break;
- case 4:
- wsize = D_LONG;
- break;
- }
-
- getde(&mainfile, dpos, &dent);
- if ((ds = dent.d_lab) == NULL) {
- if (dpos >= mainfile.ef_bbase) {
- ds = inventsymb("BS");
- ds->s_type = BSS;
- }
- else {
- ds = inventsymb("DS");
- ds->s_type = DATA;
- }
- ds->s_value = dpos;
- dent.d_lab = ds;
- dchng++;
- }
- else
- ds->s_used++;
-
- if (dent.d_type != D_BYTE) {
- if (dent.d_type != wsize) {
- if (dent.d_type == D_ADDR) {
- if (wsize != D_LONG)
- unimpl("Addr word usage");
- }
- else if (dent.d_type > wsize) {
- dchng++;
- dent.d_type = wsize;
- dent.d_lng = size;
- }
- }
- }
- else {
- dent.d_type = wsize;
- dent.d_lng = size;
- dchng++;
- }
- if (dchng) {
- putde(&mainfile, dpos, &dent);
- for (dchng = 1; dchng < size; dchng++)
- setde(&mainfile, dpos+dchng, D_CONT, 1);
- }
-
- tent.t_relsymb = ds;
- putte(&mainfile, tpos, &tent);
- }
-
- /*
- * Add item to common or abs list.
- */
-
- #define COMINC 10
-
- void addit(cp, symb)
- register struct commit *cp;
- symbol symb;
- {
- if (cp->c_int >= cp->c_max) {
- cp->c_max += COMINC;
- if (cp->c_symb == NULL)
- cp->c_symb = (symbol *) malloc(COMINC*sizeof(symbol));
- else
- cp->c_symb = (symbol *)
- realloc(cp->c_symb,
- cp->c_max * sizeof(symbol));
- if (cp->c_symb == NULL)
- nomem();
- }
- cp->c_symb[cp->c_int++] = symb;
- }
- SHAR_EOF
- if test 6396 -ne "`wc -c < 'alloc.c'`"
- then
- echo shar: error transmitting "'alloc.c'" '(should have been 6396 characters)'
- fi
- fi
- echo shar: extracting "'file.c'" '(4184 characters)'
- if test -f 'file.c'
- then
- echo shar: will not over-write existing file "'file.c'"
- else
- cat << \SHAR_EOF > 'file.c'
- /*
- * SCCS: @(#)file.c 1.2 11/2/84 14:17:35
- * Various operations on files.
- *
- ***********************************************************************
- * This software is copyright of
- *
- * John M Collins
- * 47 Cedarwood Drive
- * St Albans
- * Herts, AL4 0DN
- * England +44 727 57267
- *
- * and is released into the public domain on the following conditions:
- *
- * 1. No free maintenance will be guaranteed.
- * 2. Nothing may be based on this software without
- * acknowledgement, including incorporation of this
- * notice.
- *
- * Notwithstanding the above, the author welcomes correspondence and bug
- * fixes.
- ***********************************************************************
- */
-
- #include <stdio.h>
- #include <a.out.h>
- #include "unc.h"
-
- long lseek();
- void unimpl();
-
- /*
- * Validate addr and get text entry corresponding to it from the given
- * file.
- */
-
- void gette(fid, addr, te)
- register ef_fid fid;
- register long addr;
- t_entry *te;
- {
- addr -= fid->ef_tbase;
- if (addr < 0 || addr > fid->ef_tsize || (addr & 1) != 0) {
- (void) fprintf(stderr, "Invalid text address %lx\n", addr);
- exit(200);
- }
- (void) lseek(fid->ef_t, (long)(addr * sizeof(t_entry)/2), 0);
- if (read(fid->ef_t, (char *) te, sizeof(t_entry)) != sizeof(t_entry)) {
- (void) fprintf(stderr, "Trouble reading text at %lx\n", addr);
- exit(201);
- }
- }
-
- /*
- * Store a text entry.
- */
-
- void putte(fid, addr, te)
- register ef_fid fid;
- register long addr;
- t_entry *te;
- {
- addr -= fid->ef_tbase;
- if (addr < 0 || addr > fid->ef_tsize || (addr & 1) != 0) {
- (void) fprintf(stderr, "Invalid text address %lx\n", addr);
- exit(200);
- }
- (void) lseek(fid->ef_t, (long)(addr * sizeof(t_entry)/2), 0);
- (void) write(fid->ef_t, (char *) te, sizeof(t_entry));
- }
-
- /*
- * Validate addr and get data entry corresponding to it from the given
- * file.
- */
-
- void getde(fid, addr, de)
- register ef_fid fid;
- register long addr;
- d_entry *de;
- {
- if (addr < fid->ef_dbase || addr > fid->ef_end) {
- (void) fprintf(stderr, "Invalid data address %lx\n", addr);
- exit(200);
- }
- addr -= fid->ef_dbase;
- (void) lseek(fid->ef_d, (long)(addr * sizeof(d_entry)), 0);
- if (read(fid->ef_d, (char *) de, sizeof(d_entry)) != sizeof(d_entry)) {
- (void) fprintf(stderr, "Trouble reading data at %lx\n", addr);
- exit(201);
- }
- }
-
- /*
- * Store a data entry.
- */
-
- void putde(fid, addr, de)
- register ef_fid fid;
- register long addr;
- d_entry *de;
- {
- if (addr < fid->ef_dbase || addr > fid->ef_end) {
- (void) fprintf(stderr, "Invalid data address %lx\n", addr);
- exit(200);
- }
- addr -= fid->ef_dbase;
- (void) lseek(fid->ef_d, (long)(addr * sizeof(d_entry)), 0);
- (void) write(fid->ef_d, (char *) de, sizeof(d_entry));
- }
-
- /*
- * Set type and length of given data entry.
- */
-
- void setde(fid, addr, type, lng)
- ef_fid fid;
- long addr;
- unsigned type;
- int lng;
- {
- d_entry dat;
-
- if (addr > fid->ef_end)
- return;
- getde(fid, addr, &dat);
- if (type == D_CONT && dat.d_reloc != R_NONE) {
- char obuf[30];
- (void) sprintf(obuf, "overlapped reloc 0x%x", addr);
- unimpl(obuf);
- }
- dat.d_type = type;
- dat.d_lng = lng;
- putde(fid, addr, &dat);
- }
-
- /*
- * Get a word of data file, size as requested.
- */
-
- long getdw(fid, pos, size)
- register ef_fid fid;
- long pos;
- int size;
- {
- d_entry dat;
- register long res;
- register int i, lt;
-
- getde(fid, pos, &dat);
-
- switch (size) {
- case R_BYTE:
- return dat.d_contents;
-
- case R_LONG:
- lt = 4;
- goto rest;
-
- case R_WORD:
- lt = 2;
- rest:
- res = dat.d_contents;
- for (i = 1; i < lt; i++) {
- getde(fid, pos+i, &dat);
- res = (res << 8) + dat.d_contents;
- }
- return res;
-
- default:
- (void) fprintf(stderr, "Data word size error\n");
- exit(20);
- }
- /*NOTREACHED*/
- }
-
- /*
- * Get a word of text file.
- */
-
- long gettw(fid, pos, size)
- register ef_fid fid;
- long pos;
- int size;
- {
- t_entry tex;
- long res;
-
- gette(fid, pos, &tex);
-
- switch (size) {
- case R_BYTE:
- return tex.t_contents >> 8;
-
- case R_WORD:
- return tex.t_contents;
-
- case R_LONG:
- res = tex.t_contents;
- gette(fid, pos+2, &tex);
- return (res << 16) + tex.t_contents;
- default:
- (void) fprintf(stderr, "Text word size error\n");
- exit(20);
- }
- /*NOTREACHED*/
- }
- SHAR_EOF
- if test 4184 -ne "`wc -c < 'file.c'`"
- then
- echo shar: error transmitting "'file.c'" '(should have been 4184 characters)'
- fi
- fi
- echo shar: extracting "'heur.c'" '(9885 characters)'
- if test -f 'heur.c'
- then
- echo shar: will not over-write existing file "'heur.c'"
- else
- cat << \SHAR_EOF > 'heur.c'
- /*
- * SCCS: @(#)heur.c 1.2 11/2/84 14:17:46
- * Attempt to guess things about the file.
- *
- ***********************************************************************
- * This software is copyright of
- *
- * John M Collins
- * 47 Cedarwood Drive
- * St Albans
- * Herts, AL4 0DN
- * England +44 727 57267
- *
- * and is released into the public domain on the following conditions:
- *
- * 1. No free maintenance will be guaranteed.
- * 2. Nothing may be based on this software without
- * acknowledgement, including incorporation of this
- * notice.
- *
- * Notwithstanding the above, the author welcomes correspondence and bug
- * fixes.
- ***********************************************************************
- */
-
- #include <stdio.h>
- #include <a.out.h>
- #include "unc.h"
-
- #define INITDAT 256
- #define INCDAT 128
-
- #define STRSCNT 3
- #define STRECNT 3
-
- char *malloc(), *realloc();
-
- void gette(), getde(), setde(), putte(), putde();
- void nomem();
- long getdw();
- symbol inventsymb();
-
- long endt;
- ef_fids mainfile;
-
- /*
- * Talk about implemented things.....
- */
-
- void unimpl(msg)
- char *msg;
- {
- (void) fprintf(stderr, "Warning: handling of \"%s\" not implemented\n", msg);
- }
-
- /*
- * Return 1 if string char, otherwise 0.
- */
-
- int possstr(x)
- unsigned x;
- {
- if (x >= ' ' && x <= '~')
- return 1;
- if (x == '\n' || x == '\t')
- return 1;
- return 0;
- }
-
- /*
- * Guess things about data files.
- */
-
- void intudat(fid)
- ef_fid fid;
- {
- register int i, j;
- int lt, input, invcnt;
- long offs, soffs, endd;
- d_entry fdat;
- unsigned char *inbuf;
- int ibsize;
-
- inbuf = (unsigned char *)malloc(INITDAT);
- if (inbuf == NULL)
- nomem();
- ibsize = INITDAT;
-
- offs = fid->ef_dbase;
- endd = fid->ef_bbase;
-
- while (offs < endd) {
- getde(fid, offs, &fdat);
- if (fdat.d_type != D_BYTE) {
- offs += fdat.d_lng;
- continue;
- }
-
- /*
- * Looks like general data. Read in as much as possible.
- */
-
- input = 0;
- soffs = offs;
- do {
- if (input >= ibsize) {
- ibsize += INCDAT;
- inbuf = (unsigned char *)
- realloc((char *)inbuf, (unsigned)ibsize);
- if (inbuf == NULL)
- nomem();
- }
- inbuf[input++] = fdat.d_contents;
- offs++;
- if (offs >= endd)
- break;
- getde(fid, offs, &fdat);
- } while (fdat.d_type == D_BYTE && fdat.d_lab == NULL);
-
- /*
- * Now split up the data.
- */
-
- for (i = 0; i < input; ) {
-
- /*
- * Might be a string.
- */
-
- if (possstr(inbuf[i])) {
- lt = input;
- if (i + STRSCNT < lt)
- lt = i + STRSCNT;
- for (j = i + 1; j < lt; j++) {
- if (inbuf[j] == '\0')
- break;
- if (!possstr(inbuf[j]))
- goto notstr;
- }
-
- /*
- * Looks like a string then.
- */
-
- invcnt = 0;
- for (j = i + 1; j < input; j++) {
- if (inbuf[j] == '\0') {
- j++;
- break;
- }
- if (possstr(inbuf[j]))
- invcnt = 0;
- else {
- invcnt++;
- if (invcnt >= STRECNT) {
- j -= invcnt - 1;
- break;
- }
- }
- }
-
- setde(fid,
- soffs+i,
- (unsigned)(inbuf[j-1]=='\0'?D_ASCZ:D_ASC),
- j - i);
- for (i++; i < j; i++)
- setde(fid, soffs+i, D_CONT, 1);
- continue;
- }
-
- notstr:
- /*
- * If on odd boundary, treat as a byte.
- */
-
- if ((soffs + i) & 1 || i + 1 >= input) {
- setde(fid, soffs + i, D_BYTE, 1);
- i++;
- continue;
- }
-
- /*
- * Treat as longs unless not enough.
- */
-
- if (i + 3 >= input) {
- setde(fid, soffs + i, D_WORD, 2);
- setde(fid, soffs + i + 1, D_CONT, -1);
- i += 2;
- continue;
- }
-
- /*
- * Treat as a long but mark changable.
- */
-
- setde(fid, soffs + i, D_LONG, 4);
- for (j = 1; j < 4; j++)
- setde(fid, soffs + i + j, D_CONT, -j);
- i += 4;
- }
- }
- free((char *)inbuf);
-
- /*
- * Now zap bss segment.
- */
-
- offs = fid->ef_bbase;
- endd = fid->ef_end;
-
- while (offs < endd) {
- getde(fid, offs, &fdat);
- if (fdat.d_type != D_BYTE) {
- offs += fdat.d_lng;
- continue;
- }
-
- soffs = offs;
- do {
- offs++;
- if (offs >= endd)
- break;
- getde(fid, offs, &fdat);
- } while (fdat.d_type == D_BYTE && fdat.d_lab == NULL);
-
- setde(fid, soffs, D_BYTE, (int)(offs-soffs));
- for (i = -1, soffs++; soffs < offs; i--, soffs++)
- setde(fid, soffs, D_CONT, i);
- }
- }
-
- /*
- * For non relocatable files, try to identify address pointers in
- * the data.
- */
-
- void inturdat(fid)
- ef_fid fid;
- {
- register long offs = fid->ef_dbase;
- register int i;
- register symbol ds;
- long endd = fid->ef_bbase;
- long cont;
- d_entry dent, refdent;
-
- while (offs < endd) {
- getde(fid, offs, &dent);
- if (dent.d_type != D_LONG)
- goto endit;
- cont = getdw(fid, offs, R_LONG);
- if (cont < fid->ef_dbase || cont > fid->ef_end)
- goto endit;
- getde(fid, cont, &refdent);
- if (refdent.d_type == D_CONT) {
- d_entry pdent;
- int siz;
-
- if (refdent.d_lng >= 0)
- goto endit;
- getde(fid, cont+refdent.d_lng, &pdent);
- i = -refdent.d_lng;
- refdent.d_lng += pdent.d_lng;
- pdent.d_lng = i;
- if (pdent.d_type == D_LONG && i == 2)
- siz = D_WORD;
- else
- siz = D_BYTE;
- refdent.d_type = siz;
- pdent.d_type = siz;
- putde(fid, cont - i, &pdent);
- for (i = 1; i < refdent.d_lng; i++)
- setde(fid, cont+i, D_CONT, -i);
- }
- if ((ds = refdent.d_lab) == NULL) {
- if (cont >= fid->ef_bbase) {
- ds = inventsymb("BS");
- ds->s_type = BSS;
- }
- else {
- ds = inventsymb("DS");
- ds->s_type = DATA;
- }
- ds->s_value = cont;
- refdent.d_lab = ds;
- putde(fid, cont, &refdent);
- }
- else
- ds->s_used++;
- dent.d_type = D_ADDR;
- dent.d_relsymb = ds;
- dent.d_rptr = ds->s_type;
- putde(fid, offs, &dent);
- for (i = 1; i < 4; i++)
- setde(fid, offs+i, D_CONT, 1);
- endit:
- offs += dent.d_lng;
- }
- }
-
- /*
- * Recursively follow through the code, stopping at unconditional
- * branches and invalid instructions.
- */
-
- void follseq(pos)
- long pos;
- {
- t_entry tent;
- int lng;
- long npos;
-
- while (pos < endt) {
- gette(&mainfile, pos, &tent);
- if (tent.t_amap) /* Been here */
- return;
- tent.t_amap = 1;
- lng = findinst(&tent, pos);
- npos = pos + lng*2;
- if (npos > endt) {
- tent.t_vins = 0;
- tent.t_lng = 1;
- tent.t_type = T_UNKNOWN;
- lng = 0;
- npos = endt;
- }
- putte(&mainfile, pos, &tent);
- pos = npos;
-
- if (lng <= 0)
- return;
-
- switch (tent.t_bchtyp) {
- case T_UNBR:
- if (tent.t_relsymb == NULL)
- return;
- pos = tent.t_relsymb->s_value;
- continue;
- case T_JSR:
- if (tent.t_relsymb != NULL)
- follseq(tent.t_relsymb->s_value);
- continue;
- case T_CONDBR:
- follseq(tent.t_relsymb->s_value);
- default:
- continue;
- }
- }
- }
-
-
- /*
- * Try to work out things about text files.
- */
-
- void intutext()
- {
- long pos;
- t_entry tent;
- int lng;
-
- endt = mainfile.ef_tbase + mainfile.ef_tsize;
- pos = mainfile.ef_entry;
- nextv:
- for (; pos < endt;) {
- gette(&mainfile, pos, &tent);
- if (!tent.t_amap && tent.t_vins) {
- follseq(pos);
- pos += 2;
- goto nextiv;
- }
- pos += tent.t_lng * 2;
- if (tent.t_bchtyp == T_UNBR)
- goto nextiv;
- }
- goto dorest;
- nextiv:
- for (; pos < endt; pos += 2) {
- gette(&mainfile, pos, &tent);
- if (tent.t_bdest)
- goto nextv;
- }
- dorest:
- /*
- * Deal with unmapped instructions.
- */
-
- for (pos = 0; pos < endt;) {
- gette(&mainfile, pos, &tent);
- switch (tent.t_type) {
- case T_BEGIN:
- pos += tent.t_lng * 2;
- continue;
- case T_UNKNOWN:
- if (tent.t_vins) {
- lng = findinst(&tent, pos);
- putte(&mainfile, pos, &tent);
- if (lng > 0) {
- pos += lng * 2;
- continue;
- }
- }
- default:
- pos += 2;
- continue;
- }
- }
- }
-
- /*
- * Invent local symbols.
- */
-
- void intlsym()
- {
- long bpos, epos, hiref, hipos;
- unsigned llnum;
- t_entry tent;
- register symbol tl;
-
- endt = mainfile.ef_tbase + mainfile.ef_tsize;
- epos = mainfile.ef_entry;
- for (;;) {
- bpos = epos;
- hiref = bpos;
- if (epos >= endt)
- return;
- gette(&mainfile, epos, &tent);
- epos += tent.t_lng * 2;
- for (; epos < endt;) {
- gette(&mainfile, epos, &tent);
- if (tent.t_gbdest || tent.t_dref)
- break;
- if (tent.t_reflo < bpos)
- break;
- if (tent.t_refhi > hiref) {
- hiref = tent.t_refhi;
- hipos = epos;
- }
- epos += tent.t_lng * 2;
- }
- if (hiref > epos)
- epos = hipos;
- llnum = 0;
- for (hipos = bpos; hipos < epos;) {
- gette(&mainfile, hipos, &tent);
- if (!tent.t_gbdest && !tent.t_dref &&
- tent.t_reflo >= bpos && tent.t_refhi < epos &&
- (tl = tent.t_lab) != NULL)
- tl->s_lsymb = ++llnum;
- hipos += tent.t_lng * 2;
- }
- }
- }
-
- /*
- * Given the main file, a possible candidate for matching in the
- * file and an offset, see if text matches. Return 1 if matches,
- * or 0 if no match.
- */
-
- int matchup(mf, lf, startpos)
- register ef_fid mf, lf;
- long startpos;
- {
- register int i, matches = 0;
- t_entry ltent, mtent;
-
- if (lf->ef_tsize > mf->ef_tsize - startpos + mf->ef_tbase)
- return 0; /* At end - can't fit */
-
- for (i = 0; i < lf->ef_tsize; i += 2) {
- gette(lf, lf->ef_tbase + i, <ent);
- if (ltent.t_isrel)
- continue;
- gette(mf, startpos + i, &mtent);
- if (mtent.t_contents != ltent.t_contents)
- return 0;
- matches++;
- }
-
- /*
- * Give up on zero length or all relocatable files.
- */
-
- return matches > 0;
- }
-
- /*
- * Scan through main file looking for a match.
- */
-
- long findstart(mf, lf)
- register ef_fid mf, lf;
- {
- register long res = mf->ef_tbase;
- long lim = mf->ef_tbase + mf->ef_tsize - lf->ef_tsize;
- t_entry tent;
-
- restart:
- for (; res <= lim; res += 2) {
- gette(mf, res, &tent);
- if (tent.t_match != 0) {
- res += tent.t_match;
- goto restart;
- }
- if (matchup(mf, lf, res))
- return res;
- }
- return -1;
- }
-
- /*
- * Mark the head of a matched module to save searching.
- */
-
- void markmatch(mf, lf, pos)
- ef_fid mf, lf;
- long pos;
- {
- t_entry tent;
-
- gette(mf, pos, &tent);
- tent.t_match = (unsigned) lf->ef_tsize;
- putte(mf, pos, &tent);
- }
- SHAR_EOF
- if test 9885 -ne "`wc -c < 'heur.c'`"
- then
- echo shar: error transmitting "'heur.c'" '(should have been 9885 characters)'
- fi
- fi
- exit 0
- # End of shell archive
-